Exploratory Visualizations
Wind Speed 1873
wind_speed_1873 <- journal_1873 %>%
select(date_mdy,
month,
wind_speed_am,
wind_speed_pm,
wind_speed_night) %>%
mutate(year = "1873") %>%
pivot_longer(cols = starts_with("wind_speed"),
names_to = "period",
values_to = "wind_speed") %>%
mutate(period = case_when(
period == "wind_speed_am" ~ "am",
period == "wind_speed_pm" ~ "pm",
period == "wind_speed_night" ~ "night")) %>%
separate_rows(wind_speed, sep = ",") %>%
mutate(category = case_when(
wind_speed %in% c("blustering", "very blustering") ~ "blustering",
wind_speed %in% c("breezy", "good breeze", "very fresh breeze", "breezed up", "fresh breeze", "heavy breeze", "smart breeze", "strong breeze") ~ "breeze",
wind_speed %in% c("blowing very heavy", "fresh blow", "heavy blow", "very heavy blow", "blowy", "blowing", "blowing hard", "blowing very hard") ~ "blow",
wind_speed %in% c("fresh gale", "gale") ~ "gale",
wind_speed %in% c("moderate", "quite moderate", "very moderate") ~ "moderate",
wind_speed %in% c("pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
wind_speed %in% c("calm", "perfectly calm") ~ "calm",
wind_speed %in% c("scant wind", "heavy winds") ~ "wind",
wind_speed %in% c("rough", "squall", "strong", "very light", "heavy", "baffling", "a light air", "variable", "fair") ~ "other intensities"
))
wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = wind_speed,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "Wind Vocabulary Frequency by Period of the Day 1873",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ period)
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = wind_speed,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "General Wind Vocabulary Frequency 1873",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal(base_size = 7) +
coord_polar() +
scale_y_log10() # to better see smaller frequencies
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = "", y = n, fill = category)) +
geom_bar(stat = "identity", width = 1) + # 1 for pie chart effect
labs(x = NULL, y = "Frequency", fill = "Category") +
theme_minimal() +
coord_polar("y", start = 0) +
theme_void() +
labs(title = " 1873 Wind Vocabulary by Category")
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

Wind Speed Comparison 1873-1874
wind_speed_1874 <- journal_1874 %>%
select(date_mdy,
month,
wind_speed_am,
wind_speed_pm,
wind_speed_night) %>%
mutate(year = 1874) %>%
pivot_longer(cols = starts_with("wind_speed"),
names_to = "period",
values_to = "wind_speed") %>%
mutate(period = case_when(
period == "wind_speed_am" ~ "am",
period == "wind_speed_pm" ~ "pm",
period == "wind_speed_night" ~ "night")) %>%
separate_rows(wind_speed, sep = ",") %>%
mutate(category = case_when(
wind_speed %in% c("blustering", "very blustering") ~ "blustering",
wind_speed %in% c("breezy", "good breeze", "very fresh breeze", "breezed up", "fresh breeze", "heavy breeze", "smart breeze", "strong breeze") ~ "breeze",
wind_speed %in% c("blowing very heavy", "fresh blow", "heavy blow", "very heavy blow", "blowy", "blowing", "blowing hard", "blowing very hard") ~ "blow",
wind_speed %in% c("fresh gale", "gale") ~ "gale",
wind_speed %in% c("moderate", "quite moderate", "very moderate") ~ "moderate",
wind_speed %in% c("pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
wind_speed %in% c("calm", "perfectly calm") ~ "calm",
wind_speed %in% c("scant wind", "heavy winds") ~ "wind",
wind_speed %in% c("rough", "squall", "strong", "very light", "heavy", "baffling", "a light air", "variable", "fair") ~ "other intensities"
))
combined_wind_speed<- rbind(wind_speed_1873, wind_speed_1874)
combined_wind_speed %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category, year) %>%
group_by(wind_speed, category, year) %>%
summarize(n = n()) %>%
ggplot(aes(x = wind_speed, y = n, fill = category)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Yearly Comparison of Wind Speed Vocabulary",
subtitle = "Colored by Overarching Categories in 1873 and 1874",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ year)
## `summarise()` has grouped output by 'wind_speed', 'category'. You can override
## using the `.groups` argument.

combined_wind_speed %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category, year) %>%
group_by(category, month, year) %>%
summarise(frequency = n()) %>%
ggplot(aes(x = category, y = frequency, fill = as.factor(year))) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Monthly Comparison of Wind Speed Categories 1873-1874",
x = "Category",
y = "Frequency",
fill = "Year") +
facet_wrap(~ month) +
coord_flip()
## `summarise()` has grouped output by 'category', 'month'. You can override using
## the `.groups` argument.

Wind Directions 1873
convert_wind_direction_to_degrees <- function(wind_direction) {
degrees <- case_when(
wind_direction == "N" ~ 0,
wind_direction == "NNE" ~ 22.5,
wind_direction == "NE" ~ 45,
wind_direction == "ENE" ~ 67.5,
wind_direction == "E" ~ 90,
wind_direction == "ESE" ~ 112.5,
wind_direction == "SE" ~ 135,
wind_direction == "SSE" ~ 157.5,
wind_direction == "S" ~ 180,
wind_direction == "SSW" ~ 202.5,
wind_direction == "SW" ~ 225,
wind_direction == "WSW" ~ 247.5,
wind_direction == "W" ~ 270,
wind_direction == "WNW" ~ 292.5,
wind_direction == "NW" ~ 315,
wind_direction == "NNW" ~ 337.5,
TRUE ~ NA_real_ # Return NA if the wind direction is not recognized
)
return(degrees)
}
wind_direction_1873_long <- journal_1873 %>%
pivot_longer(cols = starts_with("wind_direction"),
names_to = "period",
values_to = "wind_direction") %>%
separate_rows(wind_direction, sep = ", ") %>%
mutate(period = case_when(
period == "wind_direction_am" ~ "am",
period == "wind_direction_pm" ~ "pm",
period == "wind_direction_night" ~ "night"
))
wind_direction_1873_long <- wind_direction_1873_long %>%
mutate(
wind_degrees = case_when(
period == "am" ~ convert_wind_direction_to_degrees(wind_direction),
period == "pm" ~ convert_wind_direction_to_degrees(wind_direction),
period == "night" ~ convert_wind_direction_to_degrees(wind_direction)
)) %>%
select(date_mdy, month, wind_direction, period, wind_degrees) %>%
mutate(year = "1873")
# Function to convert degrees to radians for polar coordinates
to_radians <- function(degrees) {
return((degrees - 90) * pi / 180)
}
wind_direction_1873_long %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentionned, 1873",
"Only Directions Explicitely Specified by Period",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") # Frequency scale (logarithmic)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 757 rows containing non-finite values (`stat_count()`).
## Warning: Removed 757 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentionned by Period of the Day, 1873",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") + # Frequency scale (logarithmic)
facet_wrap(~ period)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: Removed 757 rows containing non-finite values (`stat_count()`).
## Warning: Removed 757 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentioned by Month, 1873",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") + # Frequency scale (logarithmic)
facet_wrap(~ month)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: Removed 757 rows containing non-finite values (`stat_count()`).
## Warning: Removed 757 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
drop_na(wind_direction, period, month) %>%
ggplot(aes(x = wind_direction, fill = period)) +
geom_bar(position = "stack", color = "black") +
labs(title = "Bar Chart of Monthly Wind Directions Frequencies in 1873",
x = "Wind Direction",
y = "Frequency") +
coord_flip() +
facet_wrap(~ month, nrow = 2)

Wind Direction Comparison 1873-1874
wind_direction_1874_long <- journal_1874 %>%
pivot_longer(cols = starts_with("wind_direction"),
names_to = "period",
values_to = "wind_direction") %>%
separate_rows(wind_direction, sep = ", ") %>%
mutate(period = case_when(
period == "wind_direction_am" ~ "am",
period == "wind_direction_pm" ~ "pm",
period == "wind_direction_night" ~ "night"
))
wind_direction_1874_long <- wind_direction_1874_long %>%
mutate(
wind_degrees = case_when(
period == "am" ~ convert_wind_direction_to_degrees(wind_direction),
period == "pm" ~ convert_wind_direction_to_degrees(wind_direction),
period == "night" ~ convert_wind_direction_to_degrees(wind_direction)
)) %>%
select(date_mdy, month, wind_direction, period, wind_degrees) %>%
mutate(year = "1874")
combined_wind_direction <- rbind(wind_direction_1873_long, wind_direction_1874_long)
combined_wind_direction %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
drop_na(wind_direction, period, year) %>%
ggplot(aes(x = wind_direction, fill = year)) +
geom_bar(position = "stack", color = "black") +
labs(title = "Bar Chart of Yearly Wind Directions Frequencies",
x = "Wind Direction",
y = "Frequency") +
coord_flip()

Wind Direction and Wind Speed
combined_wind <- full_join(combined_wind_direction, combined_wind_speed, by = c("date_mdy", "month", "year", "period"))
## Warning in full_join(combined_wind_direction, combined_wind_speed, by = c("date_mdy", : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 94 of `x` matches multiple rows in `y`.
## ℹ Row 409 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
combined_wind <- combined_wind %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed )) %>%
drop_na(wind_direction, wind_speed)
freq_combined_wind <- combined_wind %>%
group_by(wind_speed,
wind_direction) %>%
summarize(frequency = n())
## `summarise()` has grouped output by 'wind_speed'. You can override using the
## `.groups` argument.
freq_combined_wind %>%
ggplot(aes(x = wind_direction,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Wind Direction Heat Map",
x = "Wind Direction",
y = "Wind Speed",
fill = "Frequency") +
scale_fill_viridis_c(direction = -1) #for darker = higehr frequency
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

combined_wind %>%
group_by(wind_speed,
wind_direction,
year) %>%
summarize(frequency = n()) %>%
ggplot(aes(x = wind_direction,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Wind Direction Heat Map by Year",
x = "Wind Direction",
y = "Wind Speed",
fill = "Frequency") +
scale_fill_viridis_c(direction = -1) +
facet_wrap(~ year)
## `summarise()` has grouped output by 'wind_speed', 'wind_direction'. You can
## override using the `.groups` argument.
## Scale for fill is already present. Adding another scale for fill, which will
## replace the existing scale.

Weather Conditions 1873
weather_con_1873_long <- journal_1873 %>%
select(date_mdy,
month,
weather_condition_am,
weather_condition_pm,
weather_condition_night) %>%
mutate(year = "1873") %>%
pivot_longer(cols = starts_with("weather_condition"),
names_to = "period",
values_to = "weather_condition") %>%
mutate(period = case_when(
period == "weather_condition_am" ~ "am",
period == "weather_condition_pm" ~ "pm",
period == "weather_condition_night" ~ "night")) %>%
separate_rows(weather_condition, sep = ",") %>%
mutate(category = case_when(
weather_condition %in% c("chilly", "cold", "cool", "extremely cold", "very cold", "quite cold") ~ "cold",
weather_condition %in% c("pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("very warm", "warm", "hot") ~ "warm",
weather_condition %in% c("clear", "cleared up", "fine") ~ "clear",
weather_condition %in% c("overcast", "cloudy") ~ "cloud",
weather_condition %in% c( "pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("calm", "perfectly calm") ~ "calm",
weather_condition %in% c("foggy", "foggy", "dense fog", "thick fog", "very foggy", "thick with fog", "very thick with fog") ~ "fog",
weather_condition %in% c("heavy showers", "very heavy shower", "shower", "showery", "a little showery") ~ "showers",
weather_condition %in% c("drizzle", "drizzling rain") ~ "drizzle",
weather_condition %in% c("cold rain", "heavy rain storm", "heavy rain", "fine rain", "raining", "rainy", "rain", "rain spells", "rain squall", "rain storm", "moderate rain", "big rain storm", "very heavy rain") ~ "rain",
weather_condition %in% c("little snow", "snow sleet", "light snow", "thick snow", "pleasant snow", "snowy", "snow", "snow spells", "snow squall", "big snow storm", "snow storm", "snowing", "snowing fast", "moderate snow") ~ "snow",
weather_condition %in% c("stormy", "tough storm", "very heavy storm", "moderate rainstorm", "heavy storm") ~ "storm",
weather_condition %in% c("thunder", "heavy thunder") ~ "thunder",
weather_condition %in% c("sharp lightning", "lightning") ~ "lightning",
weather_condition %in% c("misty", "good weather", "moderate weather", "sun out", "hail") ~ "other",
))
weather_con_1873_freq <- weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na(weather_condition) %>%
count(weather_condition)
category_colors <- c(
"cold" = "#16324a",
"pleasant" = "#ccf146",
"warm" = "#b5a642",
"clear" = "#9467bd",
"cloud" = "#8c564b",
"calm" = "#2ca02c",
"fog" = "#e377c2",
"showers" = "#1f77b4",
"drizzle" = "#004f95",
"rain" = "#17becf",
"snow" = "#bbbbbb",
"storm" = "#ff7f0e",
"thunder" = "#d62728",
"lightning" = "#ff9896",
"other" = "#c5b0d5"
)
wordcloud(
weather_con_1873_freq$weather_condition,
weather_con_1873_freq$n,
colors = category_colors,
random.order = FALSE,
scale = c(5, 1),
min.freq = 1,
max.words = Inf
)

weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na( weather_condition, period, category) %>%
group_by( weather_condition, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = weather_condition,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "Weather Condition Frequency by Period of the Day 1873",
x = "Weather Condition Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ period)
## `summarise()` has grouped output by 'weather_condition', 'period'. You can
## override using the `.groups` argument.

weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na(weather_condition, category, month) %>%
group_by(month) %>%
count(category) %>%
ggplot(aes(x = factor(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")), y = n, fill = category )) +
geom_col(position = "stack") +
scale_fill_brewer(palette = "Spectral") +
labs(title = "Weather Frequency By Category per month",
fill = "Caterogy",
x = "Month",
y = "Number of occurrences") +
coord_polar()

Letters
journal_1873 <- journal_1873 %>%
mutate(letter = case_when(
letter == "read" ~ "received",
letter == "read and write" ~ "received and write",
TRUE ~ letter # Keep the original value if none of the conditions match
))
journal_1874 <- journal_1874 %>%
mutate(letter = case_when(
letter == "read" ~ "received",
letter == "read and write" ~ "received and write",
TRUE ~ letter # Keep the original value if none of the conditions match
))
letters_1873 <- journal_1873 %>%
select(date_mdy, month, journal_entry, letter, notes) %>%
mutate(year = "1873")
letters_1874 <- journal_1874 %>%
select(date_mdy, month, journal_entry, letter, notes) %>%
mutate(year = "1874")
combined_letters <- rbind(letters_1873, letters_1874)
combined_letters %>%
count(letter, year) %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
ggplot(aes(x = letter,
y = n,
fill = year)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ year) +
labs(title = "Letter Communication",
subtitle = "For 1873 and 1874",
x = "Letter Status",
y = "Frequency",
fill = "Year")

combined_letters %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
filter(letter != "no letter") %>%
count(letter, year) %>%
ggplot(aes(x = letter, y = n, fill = year)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ year) +
labs(title = "Frequency of Letter Communication Mentioned",
subtitle = "For 1873 and 1874",
x = "Letter Status",
y = "Frequency") +
scale_y_continuous(breaks = seq(5, 30, 5))

Communication (Names & Letters)
#only rows with letters written or received
communication <- combined_letters %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
filter(letter != "no letter")
#List by chatgpt from the names and their different variations in the communication table
names <- c("C.C. Burrill", "Charles C. Burrell", "Geo. N. Loomis", "GN Loomis", "Geo. W. Tracy", "Geo W Tracy", "Thomas Bunker", "T. Smallidge Boston", "Thos Bunker", "Joseph Bunker", "C. Stevens", "Reg of Deeds", "A.J. Gerrish", "Flora Gerrish", "Dresser and Ayer", "Reuben Rand", "R. Rand", "Geo.W Butler", "Geo N Loomis", "GW Butter", "G.W. Butter", "Henry W. Sargent", "T Smallidge", "Henry Geyer", "Galen Smith", "Thos Bunker Cranberry Isles", "L.D. Parsons Bangor", "SD.Parsons Bangor", "Benj Kittridge", "Benj Kittredge", "B. Kittridge", "J.T. Kingsley", "Kellly Kingsley", "E. Kingsley", "Geo A Dyer", "Arno Wiswell", "Charles C. Burrell", "Hannah Goodwin", "Charles Norris", "Nathan Goodwin", "A.G. Brooks", "Mrs FRBunker", "AN Willye", "W. B. McCrate", "James Staples", "Capt. S.L.Tracy", "Wm Joy", "William Stevens", "E. H. Wallace", "Robt Miles", "Nickerson and Rideout", "David Ames", "Joy and Hutchings", "Reg of Deeds","A.D. Crabtree")
# Function to extract all names
extract_names <- function(text) {
extracted_names <- regmatches(text, gregexpr(paste(names, collapse = "|"), text))
return(extracted_names)
}
communication$names <- sapply(communication$journal_entry, extract_names)